%matplotlib ipympl
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
from datetime import timedelta
def load_try_encodings(file: str):
try:
return pd.read_csv(file, delimiter="|", encoding="utf-8")
except UnicodeDecodeError:
return pd.read_csv(file, delimiter="|", encoding="ISO-8859-1")
def load_year(voter_history: str, voter_list: str, ):
history = load_try_encodings(voter_history)
vlist = load_try_encodings(voter_list)
# history = pd.read_csv(voter_history, delimiter="|", encoding="utf-8")
# except UnicodeDecodeError:
# history = pd.read_csv(voter_history, delimiter="|", encoding="ISO-8859-1")
# vlist = pd.read_csv(voter_list, delimiter="|", encoding = "ISO-8859-1")
# history = pd.read_csv("11-2-21 Voter History 49ANP_239643.txt", delimiter="|")
# vlist = pd.read_csv("49VOT_238743 Nov 2021 election.txt", delimiter="|", encoding = "ISO-8859-1")
history.rename({c:c.strip() for c in history.columns}, inplace=True, axis=1)
vlist.rename({c:c.strip() for c in vlist.columns}, inplace=True, axis=1)
# manual corrections
#
history = history.set_index("Voter ID Number")
vlist = vlist.set_index("Voter ID Number")
drop_ids = [
# erroneously entered reg with birth year of 1191
# re-registered with different id the same year
"02CLS2791002",
# 2015 birthdays that are unclear how to correct
"01GSR0112000", # 01/01/1812
"01MCN0112006", # 01/01/1812
"01ANE0112001", # 01/01/1812
"01WXO0109000", # 01/01/1809
# 2013 birthdays that unclear how to correct
"01ACE0108001", # 01/01/1808
"01SEN0108008", # 01/01/1808
"01HPL0108001", # 01/01/1808
"01RKL0108001", # 01/01/1808
"01ARA0108004", # 01/01/1808
"01MRT0108003", # 01/01/1808
"01DOA0107001", # 01/01/1807
"01CBN0108003", # 01/01/1808
"01BRD0108006", # 01/01/1808
"01PAN0107002", # 01/01/1807
"01VAY0105002", # 01/01/1805
"01LNN0107002", # 01/01/1807
"01MJA0107011", # 01/01/1807
# 2012 Birthdays unclear how to correct
"09GAA0487001", # 09/04/1487
# 2011 birthday that unclear how to correct
"01QLO0105000", # 01/01/1805
"01KAA0106007", # 01/01/1806
"01LSH0108007", # 01/01/1808
"01KJN0111026", # 01/01/1811
"01AMN0108001", # 01/01/1808
]
vlist = vlist.drop(drop_ids, errors = 'ignore')
history = history.drop(drop_ids, errors='ignore')
# 04WDA0180001 in 2011
# 12GDA0186001
# 01DML1591001
# 08WJB0481002
# I modified the birth date year from 0980 to 1980
# if "04WDA0180001" in vlist:
# # correcting a date entered as 0980
# print('here?')
# vlist["04WDA0180001"]["Date of Birth"] = "04/01/1980"
birth_dates = pd.to_datetime(vlist['Date of Birth'])
# extract from the file
# this will break if multiple elec in same year
elec_date = pd.to_datetime(history['Election Date'].iloc[0])
# # check that this is actually correct ideally
age = (elec_date - birth_dates) // timedelta(days=365.2425)
vlist['age'] = age
vlist['voted'] = False
vlist.loc[vlist.index.intersection(history.index), 'voted'] = True
return vlist
voters_2022 = load_year("11-8-22 Voter History 49ANP_269498.txt", "49VOT_267488 nov 2022 voting list.txt")
voters_2020 = load_year("11-3-20 Voter History 49ANP_225530.txt", "49VOT_224084 november 2020 election.txt")
voters_2018 = load_year("11-6-18 Voter History 49ANP_162771.txt", "49VOT_162354 - Nov 6 2018 Election.txt")
voters_2016 = load_year("11-8-16 Voter History 49ANP_140283.txt", "49VOT_139226 - Nov 8 2016.txt")
voters_2014 = load_year("11-4-2014 State Election 49ANP_120872.txt", "49VOT_120372 - nov 4 2014.txt")
voters_2012 = load_year("11.6.2012 StatePres 49ANP_103892.txt", "49VOT_103340 Nov 2012 election.txt")
voters_2021 = load_year("11-2-21 Voter History 49ANP_239643.txt", "49VOT_238743 Nov 2021 election.txt")
voters_2019 = load_year("11-5-19 Voter History 49ANP_202374.txt", "49VOT_199524 - Nov 2019 election.txt")
voters_2017 = load_year("11.7.17 Voter History 49ANP_150723.txt", "49VOT_150177 - nov 7 2017.txt")
voters_2015 = load_year("11.3.15 Voter History 49ANP_129528.txt", "49VOT_128567 - Nov 3, 2015.txt")
voters_2013 = load_year("11.5.13 Municipal Election 49ANP_112159.txt", "49VOT_111500 - nov 5, 2013.txt")
voters_2011 = load_year("11.8.2011 Voter History 49ANP_91255.txt", "49VOT_90931 Nov 2011 election.txt")
---------------------------------------------------------------------------
FileNotFoundError Traceback (most recent call last)
Cell In[3], line 1
----> 1 voters_2022 = load_year("11-8-22 Voter History 49ANP_269498.txt", "49VOT_267488 nov 2022 voting list.txt")
2 voters_2020 = load_year("11-3-20 Voter History 49ANP_225530.txt", "49VOT_224084 november 2020 election.txt")
3 voters_2018 = load_year("11-6-18 Voter History 49ANP_162771.txt", "49VOT_162354 - Nov 6 2018 Election.txt")
Cell In[2], line 8, in load_year(voter_history, voter_list)
7 def load_year(voter_history: str, voter_list: str, ):
----> 8 history = load_try_encodings(voter_history)
9 vlist = load_try_encodings(voter_list)
10 # history = pd.read_csv(voter_history, delimiter="|", encoding="utf-8")
11 # except UnicodeDecodeError:
12 # history = pd.read_csv(voter_history, delimiter="|", encoding="ISO-8859-1")
13 # vlist = pd.read_csv(voter_list, delimiter="|", encoding = "ISO-8859-1")
14 # history = pd.read_csv("11-2-21 Voter History 49ANP_239643.txt", delimiter="|")
15 # vlist = pd.read_csv("49VOT_238743 Nov 2021 election.txt", delimiter="|", encoding = "ISO-8859-1")
Cell In[2], line 3, in load_try_encodings(file)
1 def load_try_encodings(file: str):
2 try:
----> 3 return pd.read_csv(file, delimiter="|", encoding="utf-8")
4 except UnicodeDecodeError:
5 return pd.read_csv(file, delimiter="|", encoding="ISO-8859-1")
File ~/mambaforge/envs/voters/lib/python3.11/site-packages/pandas/io/parsers/readers.py:948, in read_csv(filepath_or_buffer, sep, delimiter, header, names, index_col, usecols, dtype, engine, converters, true_values, false_values, skipinitialspace, skiprows, skipfooter, nrows, na_values, keep_default_na, na_filter, verbose, skip_blank_lines, parse_dates, infer_datetime_format, keep_date_col, date_parser, date_format, dayfirst, cache_dates, iterator, chunksize, compression, thousands, decimal, lineterminator, quotechar, quoting, doublequote, escapechar, comment, encoding, encoding_errors, dialect, on_bad_lines, delim_whitespace, low_memory, memory_map, float_precision, storage_options, dtype_backend)
935 kwds_defaults = _refine_defaults_read(
936 dialect,
937 delimiter,
(...)
944 dtype_backend=dtype_backend,
945 )
946 kwds.update(kwds_defaults)
--> 948 return _read(filepath_or_buffer, kwds)
File ~/mambaforge/envs/voters/lib/python3.11/site-packages/pandas/io/parsers/readers.py:611, in _read(filepath_or_buffer, kwds)
608 _validate_names(kwds.get("names", None))
610 # Create the parser.
--> 611 parser = TextFileReader(filepath_or_buffer, **kwds)
613 if chunksize or iterator:
614 return parser
File ~/mambaforge/envs/voters/lib/python3.11/site-packages/pandas/io/parsers/readers.py:1448, in TextFileReader.__init__(self, f, engine, **kwds)
1445 self.options["has_index_names"] = kwds["has_index_names"]
1447 self.handles: IOHandles | None = None
-> 1448 self._engine = self._make_engine(f, self.engine)
File ~/mambaforge/envs/voters/lib/python3.11/site-packages/pandas/io/parsers/readers.py:1705, in TextFileReader._make_engine(self, f, engine)
1703 if "b" not in mode:
1704 mode += "b"
-> 1705 self.handles = get_handle(
1706 f,
1707 mode,
1708 encoding=self.options.get("encoding", None),
1709 compression=self.options.get("compression", None),
1710 memory_map=self.options.get("memory_map", False),
1711 is_text=is_text,
1712 errors=self.options.get("encoding_errors", "strict"),
1713 storage_options=self.options.get("storage_options", None),
1714 )
1715 assert self.handles is not None
1716 f = self.handles.handle
File ~/mambaforge/envs/voters/lib/python3.11/site-packages/pandas/io/common.py:872, in get_handle(path_or_buf, mode, encoding, compression, memory_map, is_text, errors, storage_options)
863 handle = open(
864 handle,
865 ioargs.mode,
(...)
868 newline="",
869 )
870 else:
871 # Binary mode
--> 872 handle = open(handle, ioargs.mode)
873 handles.append(handle)
875 # Convert BytesIO or file objects passed with an encoding
FileNotFoundError: [Errno 2] No such file or directory: '11-8-22 Voter History 49ANP_269498.txt'
col_order = [
'Last Name',
'Middle Name',
'First Name',
'voted',
'age',
'Date of Birth',
'Date of Registration',
'Residential Address Street Number',
'Residential Address Street Name',
'univ_housing_name',
'Residential Address Street Suffix',
'Residential Address Apartment Number',
'Residential Address Zip Code',
# 'Mailing Address - Street Number and Name',
# 'Mailing Address - Apartment Number',
# 'Mailing Address - City or Town',
# 'Mailing Address - State',
# 'Mailing Address - Zip Code',
'Gender F/M',
'Voter Status',
'Party Affiliation',
'Ward Number',
'Precinct Number',
'Congressional District Number',
'Senatorial District Number',
'State Representative District',
# 'Unnamed: 26',
# 'Unnamed: 25',
# 'Record Sequence Number',
# 'Title',
]
voters = pd.concat(
[
voters_2011,
voters_2012,
voters_2013,
voters_2014,
voters_2015,
voters_2016,
voters_2017,
voters_2018,
voters_2019,
voters_2020,
voters_2021,
voters_2022,
],
keys = np.arange(2011, 2023)
)
# voters['Mailing Address ¿ Street Number and Name'] = voters['Mailing Address ¿ Street Number and Name'].combine_first(voters['Mailing Address ? Street Number and Name'])
# voters = voters.drop('Mailing Address ? Street Number and Name',axis=1)
# voters = voters.rename({'Mailing Address ¿ Street Number and Name': 'Mailing Address - Street Number and Name'})
# add a university housing name column
# makes it easier to do things like groupby for MIT dorms
voters['univ_housing_name'] = "NA"
voters = voters[col_order]
voters.index = voters.index.set_names(["year", "Voter ID Number"])
voters
def turnout_by_year_key(df, key, binn):
"""
Calculate turnout per year based on the variable *key*.
Parameters
----------
df : pd.DataFrame
Expected to have an outer (multi)index of *year*
key : str
The column to use for value_counts. e.g. "age"
Returns
-------
pd.DataFrame
"""
def _process_year(df):
voted_counts = df[df['voted']][key].value_counts().sort_index()
reg_counts = df[key].value_counts().sort_index()
df = pd.DataFrame({"voted":voted_counts, "registered":reg_counts})
return df.fillna(0).astype(int)
years= voters.index.unique(level=0)
out = pd.concat([_process_year(df.loc[year]) for year in years], keys=years)
out.index = out.index.set_names(["year", key])
out['turnout'] = out['voted']/out['registered']
return out
df = turnout_by_year_key(voters, "age")
df
grouped = df.reset_index()
age_groups = pd.cut(grouped['age'], np.arange(18, 114, 4), include_lowest=True)
grouped['age_group'] = age_groups
grouped = grouped.groupby(["year", "age_group"]).sum().sort_index().drop("age", axis=1).reset_index()
mid_points = [g.mid for g in grouped['age_group']]
grouped['mid_points'] = mid_points # convenience for plotting down the line
# transforming the intervals into strings for easy using the multiindex
# this can't be the best way to do this :(
# this is lowkey awful
grouped['age_group'] = [f"{int(np.round(g.left))}-{int(g.right)}" for g in grouped['age_group']]
grouped.index = pd.MultiIndex.from_frame(grouped[['year', 'age_group']])
grouped = grouped.drop(['year', 'age_group'], axis=1)
grouped['turnout'] = grouped['voted'] / grouped['registered']
grouped
Turnout vs registration by age group (Municipal Elections)#
def turnout_bar_graph(df, ax=None):
bar_width = 3.75
if ax is None:
ax = plt.gca()
ax.bar(df['mid_points'], df['voted'], width = bar_width, color='tab:green', label="Voted")
ax.bar(df['mid_points'], df['registered'] - df['voted'], bottom= df['voted'], width=bar_width, color="gray", label="Registered - did not vote")
ax.set_xlim([17, 85])
# fig, axs = plt.subplots(2,3, constrained_layout=True, figsize=(12,6),sharex=True)
# turnout_bar_graph(grouped.loc[2021], ax=axs[0,0])
# axs[0,0].set_title("2021")
# turnout_bar_graph(grouped.loc[2019], ax=axs[0,1])
# axs[0,1].set_title("2019")
# turnout_bar_graph(grouped.loc[2017], ax=axs[0,2])
# axs[0,2].set_title("2017")
# turnout_bar_graph(grouped.loc[2015], ax=axs[1, 0])
# axs[1,0].set_title("2015")
# turnout_bar_graph(grouped.loc[2013], ax=axs[1, 1])
# axs[1,1].set_title("2013")
# turnout_bar_graph(grouped.loc[2011], ax=axs[1, 2])
# axs[1,2].set_title("2011")
# axs[1,1].set_xlabel("Age")
# plt.legend()
fig, axs = plt.subplots(3,3, figsize=(16,6), sharex=True, sharey=True)
# fig.suptitle("Harvard Grad Dorms")
years = np.arange(2011, 2023)
year = 2022
bar_width = .75
for i, ax in enumerate(axs.reshape(-1)):
year = years[-i-1]
ax.set_title(f"{year}")
turnout_bar_graph(grouped.loc[year], ax)
# ax.set_xticklabels([l.get_text().split()[0] for l in ax.get_xticklabels()])
fig.supxlabel("Age (4 year bins)")
ax.legend()
plt.tight_layout()
# plt.tight_layout()
ages = np.arange(18, 79, 4)
groups = [f"{i}-{i+4}" for i in ages]
df = grouped.loc[2022]
fig, ax = plt.subplots(figsize=(6, 14))
bar_width = 3.75
ax.barh(df['mid_points'], df['registered'], height=bar_width, color="gray", label="Registered")
ax.barh(df['mid_points'], df['voted'], height = bar_width, color='tab:green', label="Voted")
ax.set_yticks(ages+2,labels=groups,fontsize=20)
ax.legend(fontsize=25)
ax.set_ylim([85,17])
plt.tight_layout()
ages = np.arange(18, 75, 4)
groups = [f"{i}-{i+4}" for i in ages]
colors = plt.cm.viridis(ages/ages.max())
plt.figure()
for i, group in enumerate(groups):
grouped['turnout'].xs(group, level=1).plot(label=group, style='o--', color=colors[i])
plt.legend(bbox_to_anchor=(1.05, 1), loc='upper left', borderaxespad=0.)
plt.title("Turnout by Age and Year")
plt.ylabel("Turnout %")
plt.xlabel("Election Year")
plt.tight_layout()
plt.grid()
plt.figure()
for i, group in enumerate(groups):
grouped['voted'].xs(group, level=1).plot(label=group, style='o--', color=colors[i])
plt.legend(bbox_to_anchor=(1.05, 1), loc='upper left', borderaxespad=0.)
plt.title("Turnout by Age and Year")
plt.ylabel("Turnout Number")
plt.xlabel("Election Year")
plt.tight_layout()
plt.grid()
from cycler import cycler
years = np.arange(2011, 2023)
fig, axs = plt.subplots(1,2,figsize=(12,4.5))
axs[0].set_prop_cycle(cycler(color=plt.get_cmap("tab20").colors))
axs[1].set_prop_cycle(cycler(color=plt.get_cmap("tab20").colors))
for year in years:
if year %2 == 0:
alpha = .8
linestyle='--'
else:
alpha = 1
linestyle='-'
axs[0].plot(grouped.loc[year]['mid_points'].values, grouped.loc[year]['turnout'].values*100, 'o', linestyle=linestyle, label=year, alpha=alpha)
axs[1].plot(grouped.loc[year]['mid_points'].values, grouped.loc[year]['voted'].values, 'o',linestyle=linestyle, label=year, alpha=alpha)
axs[0].set_title("Turnout %")
axs[1].set_title("Turnout Numbers")
axs[0].set_ylabel("%")
axs[1].set_ylabel("Number of Votes Cast")
# axs[0].set_prop_cycle(cycler(color=plt.get_cmap('tab20c').colors))
# axs[1].set_prop_cycle(cycler(color=plt.get_cmap('tab20c').colors))
labelsize = 15
axs[0].grid()
axs[1].grid()
fig.supxlabel("Age (4 yr bins)", size=labelsize)
plt.legend()
plt.tight_layout()
fig, ax = plt.subplots()
ax.set_prop_cycle(cycler(color=plt.get_cmap("tab20").colors))
plt.title("Turnout vs Age")
plt.plot(grouped.loc[2021]['mid_points'].values, grouped.loc[2021]['turnout'].values, 'o--', label='2021', alpha=.8)
plt.plot(grouped.loc[2019]['mid_points'].values, grouped.loc[2019]['turnout'].values, 'o--', label='2019', alpha=.8)
plt.plot(grouped.loc[2017]['mid_points'].values, grouped.loc[2017]['turnout'].values, 'o--', label='2017', alpha=.8)
plt.plot(grouped.loc[2015]['mid_points'].values, grouped.loc[2015]['turnout'].values, 'o--', label='2015', alpha=.8)
plt.plot(grouped.loc[2013]['mid_points'].values, grouped.loc[2013]['turnout'].values, 'o--', label='2013', alpha=.8)
plt.plot(grouped.loc[2011]['mid_points'].values, grouped.loc[2011]['turnout'].values, 'o--', label='2011', alpha=.8)
plt.legend()
# plt.plot(grouped.loc[2021]['mid_points'].values, grouped.loc[2021]['turnout'].values, 'o--')
# plt.plot(grouped.loc[2021]['mid_points'].values, grouped.loc[2021]['turnout'].values, 'o--')
plt.xlabel("Age (4 yr bins)")
plt.ylabel("Turnout %")
plt.figure()
plt.title("Turnout vs Age")
plt.grid()
plt.plot(grouped.loc[2021]['mid_points'].values, grouped.loc[2021]['voted'].values, 'o--', label='2021', alpha=.8)
plt.plot(grouped.loc[2019]['mid_points'].values, grouped.loc[2019]['voted'].values, 'o--', label='2019', alpha=.8)
plt.plot(grouped.loc[2017]['mid_points'].values, grouped.loc[2017]['voted'].values, 'o--', label='2017', alpha=.8)
plt.plot(grouped.loc[2015]['mid_points'].values, grouped.loc[2015]['voted'].values, 'o--', label='2015', alpha=.8)
plt.plot(grouped.loc[2013]['mid_points'].values, grouped.loc[2013]['voted'].values, 'o--', label='2013', alpha=.8)
plt.plot(grouped.loc[2011]['mid_points'].values, grouped.loc[2011]['voted'].values, 'o--', label='2011', alpha=.8)
plt.legend()
# plt.plot(grouped.loc[2021]['mid_points'].values, grouped.loc[2021]['turnout'].values, 'o--')
# plt.plot(grouped.loc[2021]['mid_points'].values, grouped.loc[2021]['turnout'].values, 'o--')
plt.xlabel("Age (4 yr bins)")
plt.ylabel("Turnout (Vote count)")
plt.tight_layout()
plt.figure()
plt.title("Turnout vs Age")
plt.grid()
plt.plot(grouped.loc[2021]['mid_points'].values, grouped.loc[2021]['registered'].values, 'o--', label='2021', alpha=.8)
plt.plot(grouped.loc[2019]['mid_points'].values, grouped.loc[2019]['registered'].values, 'o--', label='2019', alpha=.8)
plt.plot(grouped.loc[2017]['mid_points'].values, grouped.loc[2017]['registered'].values, 'o--', label='2017', alpha=.8)
plt.plot(grouped.loc[2015]['mid_points'].values, grouped.loc[2015]['registered'].values, 'o--', label='2015', alpha=.8)
plt.plot(grouped.loc[2013]['mid_points'].values, grouped.loc[2013]['registered'].values, 'o--', label='2013', alpha=.8)
plt.plot(grouped.loc[2011]['mid_points'].values, grouped.loc[2011]['registered'].values, 'o--', label='2011', alpha=.8)
plt.legend()
# plt.plot(grouped.loc[2021]['mid_points'].values, grouped.loc[2021]['turnout'].values, 'o--')
# plt.plot(grouped.loc[2021]['mid_points'].values, grouped.loc[2021]['turnout'].values, 'o--')
plt.xlabel("Age (4 yr bins)")
plt.ylabel("Registered Voters")
plt.tight_layout()
University Housing/Dorms#
from collections import defaultdict
from collections.abc import Iterable
def find_housing_idxs(df: pd.DataFrame, housing_locations:dict):
# make every street number a tuple for convenience
building = dict(housing_locations)
indices = defaultdict(lambda : np.zeros(len(df), dtype=bool))
def _find_idx(street_num, street_name):
idx = [street in street_name for street in df['Residential Address Street Name']]
if street_num is not None:
if not isinstance(street_num, Iterable):
# turn single number addr into iterableto match places with multiple addresses
street_num = (street_num, )
# multiple valid street numbers
idx = np.logical_and(idx, [num in street_num for num in df['Residential Address Street Number']])
return idx
for name, v in building.items():
if isinstance(v, list):
# complex with multiple addresses - e.g. holden green
for addr in v:
indices[name] |= _find_idx(addr[0], addr[1])
else:
indices[name] |= _find_idx(v[0], v[1])
for name, idx in indices.items():
df.loc[idx,'univ_housing_name'] = name
indices['all']=np.any(list(indices.values()),axis=0)
return indices
Harvard Grad Dorms#
gsas_dorms = ["richards hl", "perkins hl", "conant hl", "child hl"]
gsas_dorms = {name: (None, name.upper()) for name in gsas_dorms}
gsas_idx = find_housing_idxs(voters, gsas_dorms)
fig, axs = plt.subplots(3,3, figsize=(16,6), sharex=True, sharey=True)
fig.suptitle("Harvard Grad Dorms")
year = 2022
bar_width = .75
for i, ax in enumerate(axs.reshape(-1)):
year = years[-i-1]
ax.set_title(f"{year}")
university_housing_bar_chart(ax, year, gsas_idx['all'], voters)
ax.set_xticklabels([l.get_text().split()[0] for l in ax.get_xticklabels()])
ax.legend()
plt.tight_layout()
df = turnout_by_year_key(voters, "univ_housing_name")
# groupby is the easiest way i could see to get the order I wanted
# couldn't get the reorder levels to work properly :/
df = df.groupby(["univ_housing_name", "year"]).mean()
df
fig, axs = plt.subplots(1,3, figsize=(12,4.5),constrained_layout=True)
for dorm in gsas_dorms.keys():
axs[0].plot(df.loc[dorm]['registered'], 'o--', label=dorm.split()[0])
axs[0].set_title("Registered")
axs[1].set_title("Voted")
axs[2].set_title("Turnout %")
axs[1].plot(df.loc[dorm]['voted'], 'o--', label=dorm.split()[0])
axs[2].plot(df.loc[dorm]['turnout'], 'o--', label=dorm.split()[0])
plt.ylabel("Turnout %")
plt.legend(bbox_to_anchor=(1.05, 1), loc='upper left', borderaxespad=0.)
Harvard Law School#
hvd_law_dorms = ["dane hl", "ames hl", "shaw hl", "story hl", "holmes hl", "hastings hl", "north hl"]
hvd_law_dorms = {name: (None, name.upper()) for name in hvd_law_dorms}
hvd_law_idx = find_housing_idxs(voters, hvd_law_dorms)
fig, axs = plt.subplots(3,3, figsize=(16,6), sharex=True, sharey=True)
plt.suptitle("Harvard Law Dorms Turnout")
for i, ax in enumerate(axs.reshape(-1)):
year = years[-i-1]
ax.set_title(f"{year}")
university_housing_bar_chart(ax, year, hvd_law_idx['all'], voters)
ax.tick_params(axis='x', labelrotation=90)
ax.set_xticklabels([l.get_text().split()[0] for l in ax.get_xticklabels()])
ax.legend()
plt.tight_layout()
df = turnout_by_year_key(voters, "univ_housing_name")
# groupby is the easiest way i could see to get the order I wanted
# couldn't get the reorder levels to work properly :/
df = df.groupby(["univ_housing_name", "year"]).mean()
fig, axs = plt.subplots(1,3, figsize=(12,4.5),constrained_layout=True)
for dorm in hvd_law_dorms.keys():
axs[0].plot(df.loc[dorm]['registered'], 'o--', label=dorm.split()[0])
axs[0].set_title("Registered")
axs[1].set_title("Voted")
axs[2].set_title("Turnout %")
axs[1].plot(df.loc[dorm]['voted'], 'o--', label=dorm.split()[0])
axs[2].plot(df.loc[dorm]['turnout'], 'o--', label=dorm.split()[0])
plt.ylabel("Turnout %")
plt.legend(bbox_to_anchor=(1.05, 1), loc='upper left', borderaxespad=0.)
Harvard Housing#
still missing some big ones. see map here: https://osa.gse.harvard.edu/files/gse-osa/files/hu_housing_map.pdf
would also be cool to normalize by how many units are in each building
harvard_housing = {
"Peabody Terrace":(None, "PEABODY TER"),
"Holden Green": [(None, "HOLDEN GRN"), (list(range(10, 38+2,2)), "HOLDEN ST")], # multiple address here. the func will handle this
"29 Garden St": (29, "GARDEN ST"),
"Botanic Gardens": (None, "FERNALD DR"),
"Kirkland Court": ((37, 39, 31), "KIRKLAND ST"),
"10 Akron": (10, "AKRON ST"),
"Ware St" : ((9, 11, 13, 15, 17 ,19), "WARE ST"), # as it stands the function won't differentiate between 13 and 13A ware st so should pick up both
"Prescott" : (list(range(85, 95+1,2)), "PRESCOTT ST")
}
# TODO: haskins hall, beckwith cricle, terry terrace
harvard_housing_idx = find_housing_idxs(voters, harvard_housing)
fig, axs = plt.subplots(3,3, figsize=(17,8), sharex=True, sharey=True)
plt.suptitle("Harvard University Housing Turnout")
for i, ax in enumerate(axs.reshape(-1)):
year = years[-i-1]
ax.set_title(f"{year}")
university_housing_bar_chart(ax, year, harvard_housing_idx['all'], voters)
ax.tick_params(axis='x', labelrotation=90)
ax.legend()
plt.tight_layout()
df = turnout_by_year_key(voters, "univ_housing_name")
# groupby is the easiest way i could see to get the order I wanted
# couldn't get the reorder levels to work properly :/
df = df.groupby(["univ_housing_name", "year"]).mean()
fig, axs = plt.subplots(1,3, figsize=(16,6),constrained_layout=True)
for dorm in harvard_housing.keys():
axs[0].plot(df.loc[dorm]['registered'], 'o--', label=dorm.split()[0])
axs[0].set_title("Registered")
axs[1].set_title("Voted")
axs[2].set_title("Turnout %")
axs[1].plot(df.loc[dorm]['voted'], 'o--', label=dorm.split()[0])
axs[2].plot(df.loc[dorm]['turnout'], 'o--', label=dorm)
plt.ylabel("Turnout %")
plt.legend(bbox_to_anchor=(1.05, 1), loc='upper left', borderaxespad=0.)
Undergrad houses#
harvard_ugrad_houses = [h+" House" for h in ["Leverett", "Pforzheimer", "Adams", "Currier", "Cabot", "Dunster", "Eliot", "Kirkland", "Lowell", "Mather", "Quincy", "Winthrop"]]
harvard_ugrad_houses = {name: (None, name.upper()) for name in harvard_ugrad_houses}
harvard_ugrad_idx = find_housing_idxs(voters, harvard_ugrad_houses)
fig, axs = plt.subplots(3,3, figsize=(16,6), sharex=True, sharey=True)
plt.suptitle("Harvard Undergrad house turnout")
year = 2022
bar_width = .75
for i, ax in enumerate(axs.reshape(-1)):
year = years[-i-1]
ax.set_title(f"{year}")
university_housing_bar_chart(ax, year, harvard_ugrad_idx['all'], voters)
ax.tick_params(axis='x', labelrotation=90)
ax.set_xticklabels([l.get_text().split()[0] for l in ax.get_xticklabels()])
ax.legend()
plt.tight_layout()
df = turnout_by_year_key(voters, "univ_housing_name")
# groupby is the easiest way i could see to get the order I wanted
# couldn't get the reorder levels to work properly :/
df = df.groupby(["univ_housing_name", "year"]).mean()
fig, axs = plt.subplots(1,3, figsize=(16,6),constrained_layout=True)
for dorm in harvard_ugrad_houses.keys():
axs[0].plot(df.loc[dorm]['registered'], 'o--', label=dorm.split()[0])
axs[0].set_title("Registered")
axs[1].set_title("Voted")
axs[2].set_title("Turnout %")
axs[1].plot(df.loc[dorm]['voted'], 'o--', label=dorm.split()[0])
axs[2].plot(df.loc[dorm]['turnout'], 'o--', label=dorm)
plt.ylabel("Turnout %")
plt.legend(bbox_to_anchor=(1.05, 1), loc='upper left', borderaxespad=0.)
1st year#
harvard_1st_year = [h+" HL" for h in [
"Greenough",
"Hurlbut",
"Pennypacker",
"Wigglesworth ",
"Grays",
"Matthews",
"Weld",
"Apley",
"Hollis",
"Holworthy",
"Lionel",
"Mass Hall",
"Mower",
"Stoughton",
"Straus",
"Canaday",
"Thayer",
]]
harvard_1st_year = {name: (None, name.upper()) for name in harvard_1st_year}
harvard_1st_idx = find_housing_idxs(voters, harvard_1st_year)
fig, axs = plt.subplots(3,3, figsize=(16,6), sharex=True, sharey=True)
plt.suptitle("Harvard Undergrad house turnout")
year = 2022
bar_width = .75
for i, ax in enumerate(axs.reshape(-1)):
year = years[-i-1]
ax.set_title(f"{year}")
university_housing_bar_chart(ax, year, harvard_ugrad_idx['all'], voters)
ax.tick_params(axis='x', labelrotation=90)
ax.set_xticklabels([l.get_text().split()[0] for l in ax.get_xticklabels()])
ax.legend()
plt.tight_layout()
df = turnout_by_year_key(voters, "univ_housing_name")
# groupby is the easiest way i could see to get the order I wanted
# couldn't get the reorder levels to work properly :/
df = df.groupby(["univ_housing_name", "year"]).mean()
fig, axs = plt.subplots(1,3, figsize=(16,6),constrained_layout=True)
for dorm in harvard_ugrad_houses.keys():
axs[0].plot(df.loc[dorm]['registered'], 'o--', label=dorm.split()[0])
axs[0].set_title("Registered")
axs[1].set_title("Voted")
axs[2].set_title("Turnout %")
axs[1].plot(df.loc[dorm]['voted'], 'o--', label=dorm.split()[0])
axs[2].plot(df.loc[dorm]['turnout'], 'o--', label=dorm)
plt.ylabel("Turnout %")
plt.legend(bbox_to_anchor=(1.05, 1), loc='upper left', borderaxespad=0.)
MIT#
Undergrad#
These dorms are recorded with real addresses rather than house names
mit_dorms = {
"Baker House": (362, "MEMORIAL DR"),
"Burton Conner":(410, "MEMORIAL DR"),
"East Campus" :(3, "AMES ST"),
"MacGregor House":(450, "MEMORIAL DR"),
"Maseeh Hall":(305, "MEMORIAL DR"),
"McCormick Hall" : (320, "MEMORIAL DR"),
"New House" : (tuple(range(471, 476+1)), "MEMORIAL DR"),
"Next House":(500, "MEMORIAL DR"),
"New Vassar":(189, "VASSAR ST"),
"Random Hall":(290, "MASSACHUSETTS AVE"),
"Simmons Hall": (tuple(range(229, 243+1)), "VASSAR ST")
}
mit_ugrad_idx = find_housing_idxs(voters, mit_dorms)
fig, axs = plt.subplots(3,3, figsize=(16,6), sharex=True, sharey=True)
year = 2022
plt.suptitle("MIT Undergrad Dorm Turnout over the years")
bar_width = .75
for i, ax in enumerate(axs.reshape(-1)):
year = years[-i-1]
university_housing_bar_chart(ax, year, mit_ugrad_idx['all'], voters)
ax.set_title(f"{year}")
ax.tick_params(axis='x', labelrotation=90)
ax.tick_params(axis='x', labelrotation=90)
# ax.set_xticklabels([l.get_text().split()[0] for l in ax.get_xticklabels()])
ax.legend()
plt.tight_layout()
df = turnout_by_year_key(voters, "univ_housing_name")
# groupby is the easiest way i could see to get the order I wanted
# couldn't get the reorder levels to work properly :/
df = df.groupby(["univ_housing_name", "year"]).mean()
fig, axs = plt.subplots(1,3, figsize=(16,6),constrained_layout=True)
for dorm in mit_dorms.keys():
axs[0].plot(df.loc[dorm]['registered'], 'o--', label=dorm.split()[0])
axs[0].set_title("Registered")
axs[1].set_title("Voted")
axs[2].set_title("Turnout %")
axs[1].plot(df.loc[dorm]['voted'], 'o--', label=dorm)
axs[2].plot(df.loc[dorm]['turnout'], 'o--', label=dorm)
plt.ylabel("Turnout %")
plt.legend(bbox_to_anchor=(1.05, 1), loc='upper left', borderaxespad=0.)
MIT Grad#
unlike harvard have a very convenient and easy to find table of places.
https://studentlife.mit.edu/grad-residences
mit_grad_housing = {
"70 Amherst": (70, "AMHERST ST"),
"Ashdown":(235, "ALBANY ST"),
"Edgerton": (143, "ALBANY ST"),
"Grad Tower": (45, "HAYWARD ST"),
"Sidney Pacific": (70, "PACIFIC ST"),
"Tang Hall" : (550, "MEMORIAL DR"),
"The Warehouse": (224, "ALBANY ST"),
"Westgate" : (540, "MEMORIAL DRIVE")
}
mit_grad_idx = find_housing_idxs(voters, mit_grad_housing)
fig, axs = plt.subplots(4,3, figsize=(16,6), sharex=True, sharey=True)
year = 2022
plt.suptitle("MIT Grad Housing Turnout")
bar_width = .75
for i, ax in enumerate(axs.reshape(-1)):
year = years[-i-1]
university_housing_bar_chart(ax, year, mit_grad_idx['all'], voters)
ax.set_title(f"{year}")
ax.tick_params(axis='x', labelrotation=90)
ax.tick_params(axis='x', labelrotation=90)
# ax.set_xticklabels([l.get_text().split()[0] for l in ax.get_xticklabels()])
ax.legend()
plt.tight_layout()
df = turnout_by_year_key(voters, "univ_housing_name")
# groupby is the easiest way i could see to get the order I wanted
# couldn't get the reorder levels to work properly :/
df = df.groupby(["univ_housing_name", "year"]).mean()
fig, axs = plt.subplots(1,3, figsize=(16,6),constrained_layout=True)
for dorm in mit_grad_housing.keys():
axs[0].plot(df.loc[dorm]['registered'], 'o--', label=dorm.split()[0])
axs[0].set_title("Registered")
axs[1].set_title("Voted")
axs[2].set_title("Turnout %")
axs[1].plot(df.loc[dorm]['voted'], 'o--', label=dorm)
axs[2].plot(df.loc[dorm]['turnout'], 'o--', label=dorm)
plt.ylabel("Turnout %")
plt.legend(bbox_to_anchor=(1.05, 1), loc='upper left', borderaxespad=0.)
Lesley#
todo
https://lesley.edu/students/housing/residence-halls
cool interactive map here: https://map.concept3d.com/?id=31#!ce/238?m/3276?s/
lesley_housing = {
"Doble": (30, "MELLEN ST"),
"Compass House": (14, "WENDELL ST"),
"Everett House": (28, "WENDELL ST"),
"Jenckes House": (31, "MELLEN ST"),
"Kidder House": ((2,4), "SAINT JOHNS RD"),
"[Lesley] Kirkland House": (61, "OXFORD ST"),
"Kris House": (68, "OXFORD ST"),
"Lawrence Hall": (99, "BRATTLE ST"),
"MacKenzie Hall": (36, "MELLEN ST"),
"Malloch Hall": (38, "MELLEN ST"),
"Mellen House": (24, "MELLEN ST"),
"Rousmaniere House": (6, "SAINT JOHNS RD"),
"Wendell House": (63, "OXFORD ST"),
"White Hall": (33, "EVERETT ST"),
"Wilbur House": (78, "OXFORD ST"),
"Wilson House": ((16,18), "WENDELL ST"),
"Winthrop Hall": (list(range(1,7+1, 2)), "SAINT JOHNS RD"),
"Wolfard Hall": (34, "MELLEN ST"),
}
lesley_housing_idx = find_housing_idxs(voters, lesley_housing)
fig, axs = plt.subplots(4,3, figsize=(16,6), sharex=True, sharey=True)
year = 2022
plt.suptitle("Lesley Housing Turnout")
bar_width = .75
for i, ax in enumerate(axs.reshape(-1)):
year = years[-i-1]
university_housing_bar_chart(ax, year, lesley_housing_idx['all'], voters)
ax.set_title(f"{year}")
ax.tick_params(axis='x', labelrotation=90)
ax.tick_params(axis='x', labelrotation=90)
# ax.set_xticklabels([l.get_text().split()[0] for l in ax.get_xticklabels()])
ax.legend()
plt.tight_layout()
df = turnout_by_year_key(voters, "univ_housing_name")
# groupby is the easiest way i could see to get the order I wanted
# couldn't get the reorder levels to work properly :/
df = df.groupby(["univ_housing_name", "year"]).mean()
fig, axs = plt.subplots(1,3, figsize=(16,6),constrained_layout=True)
for dorm in lesley_housing.keys():
axs[0].plot(df.loc[dorm]['registered'], 'o--', label=dorm.split()[0])
axs[0].set_title("Registered")
axs[1].set_title("Voted")
axs[2].set_title("Turnout %")
axs[1].plot(df.loc[dorm]['voted'], 'o--', label=dorm)
axs[2].plot(df.loc[dorm]['turnout'], 'o--', label=dorm)
plt.ylabel("Turnout %")
plt.legend(bbox_to_anchor=(1.05, 1), loc='upper left', borderaxespad=0.)
np.unique(voters[lesley_housing_idx['all']].loc[2012]['univ_housing_name'], return_counts=True)
df = turnout_by_year_key(voters, "univ_housing_name")
df.loc[2011, "shaw hl"]
voters[lesley_housing_idx['all']].loc[2012]['univ_housing_name']